*****************************************************************
*This file cleans enrollment rates at the state level from IPEDS*
*****************************************************************

use "$clean_data_education/fallEnrollmentTotalsReshaped.dta", clear
merge 1:m unitid yearGroup using "$clean_data_education/locations.dta", keep(3) nogenerate  // we need locations only for the obs that are availabe, there are also some obs in 1990 that do not have info on location (_merge = 1)
merge m:1 unitid using "$clean_data_education/institutionControls.dta", keep(1 3) keepusing(forProfit public iclevel freshmenFullTimeAvg studentsAvg sector) nogenerate  // there will be some _merge = 1 from alaska and co

	drop if nonMainland == 1 | (year == 1990 & mi(czone)) // dropping uni in alaska and so on and dropping some obs in 1991 that have details for enrollment but not on institution details
	drop if yearGroup == 1990 
	drop if forProfit == 1
	drop if freshmenFullTimeAvg < 50
	drop if periods < 3
	

egen total11 = rsum(total3 total12), missing

*** aggregation by ownership
gen undPublic = 0
	replace undPublic = total3 if public == 1
gen undPrivate = 0
	replace undPrivate = total3 if public == 0

*** aggregation by type of institution
gen und4Y = 0
	replace und4Y = total3 if (sector >= 1 & sector <= 3) 
gen und2Y = 0
	replace und2Y = total3 if (sector >= 4 & sector <= 6) 
gen undL2Y = 0
	replace undL2Y = total3 if (sector >= 7 & sector <= 9) 

** collapse to obtain aggregated values at CZ level
foreach v of var * {
local l`v' : variable label `v'
	if `"`l`v''"' == "" {
	local l`v' "`v'"
  	}
}
collapse (last) region division name_czone (sum) total* und*, by(statefip yearGroup)
foreach v of var * {
label var `v' "`l`v''"
}

**** Ratios and delta
merge 1:1 statefip yearGroup using "$clean_data_lmarket/census_populationDataState.dta", assert(2 3)
	drop if _merge == 2 // since we do not have all the CZs in IPEDS data 
	drop _merge


*  ratios
foreach var of var total* und* {
	replace `var' = `var'/ipums_pop
	}

** keep only the variables for which you built the ratios
keep yearGroup statefip region division name_czone total* und*

* delta 
sort statefip yearGroup
gen d = .
	replace d = 1 if yearGroup == 1994
	replace d = 2 if yearGroup == 2000
	replace d = 3 if yearGroup == 2007
	replace d = 4 if yearGroup == 2014

	*Temp file to match deltas with ratios by year (to have the beginning of period on the save row)
	preserve
	gen year = 1990 if yearGroup == 1994
	replace year = 2000 if yearGroup == 2000
	replace year = 2008 if yearGroup == 2007
	drop yearGroup d
	drop if mi(year)
	save "$final_data_outcomes/temp.dta", replace
	restore

***
foreach v of var total* und* {
	local l`v' : variable label `v'
		if `"`l`v''"' == "" {
		local l`v' "`v'"
		}
	}

xtset statefip d
foreach i of var total* und* {
	gen  dEnr_`i' = (`i' - L.`i')*100
	}

foreach v of var total* und* {
	label var dEnr_`v' "`l`v''"
	}
***

xtset, clear
drop d total* und*

* generate time variable needed by Ben's code
gen year = 1990 if yearGroup == 2000
	replace year = 2000 if yearGroup == 2007
	replace year = 2008 if yearGroup == 2014
	drop if mi(year)

merge 1:1 statefip year using "$final_data_outcomes/temp.dta", keep(3) nogenerate // merge = 2, only one ratio -> no delta is possible

save "$final_data_outcomes/IPEDS_EnrollmentState.dta", replace
rm "$final_data_outcomes/temp.dta"
